import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule


class DoubanSpider(CrawlSpider):
    name = "douban"
    # allowed_domains = ["book.douban.com"]
    start_urls = ["https://movie.douban.com/top250"]
    # 匹配页码地址
    link = LinkExtractor(allow=r'start=\d+&filter=')

    # 匹配详情页地址
    link_detail = LinkExtractor(allow=r'https://movie.douban.com/subject/\d+/')

    # allow值什么都不写 则为提取所有的url
    link_all = LinkExtractor(allow=r'')

    rules = (Rule(link, callback="parse_item", follow=True),)

    def parse_item(self, response):
        item = {}
        # print(response)
        return item

    def parse_detail_item(self, response):
        item = {}
        print(response)
        return item

    def parse_all_item(self, response):
        item = {}
        print(response)
        return item
